from IPython.core.display import HTML,Image
HTML('''<script> code_show=true; function code_toggle() { if (code_show){ $('div.input').hide(); } else { $('div.input').show(); } code_show = !code_show } $( document ).ready(code_toggle); </script> <form action='javascript:code_toggle()'><input type='submit' value='Toggle Code'></form>''')
import warnings
warnings.filterwarnings('ignore')
import gc, argparse, sys, os, errno
%pylab inline
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt;
import seaborn as sns
#sns.set()
#sns.set_style('whitegrid')
import h5py
from PIL import Image
import os
from tqdm import tqdm_notebook as tqdm
import scipy
import sklearn
from scipy.stats import pearsonr
from scipy.io import loadmat
import IPython.display as ipd
import IPython
import librosa.display
import librosa
from pystoi import stoi
def plot_stft(audio,ax=None,n_fft=256,hop_length=128,show=False,n_mels=128,y_axis='mel'):
X = librosa.stft(audio,n_fft=n_fft,hop_length=hop_length)
if y_axis=='mel':
#x_stft_db = librosa.feature.melspectrogram(x, sr=16000,n_fft=n_fft,win_length=win_length,hop_length=hop_length)
S = librosa.feature.melspectrogram(audio, sr=16000,n_mels=n_mels,fmax=8000,n_fft=n_fft,hop_length=hop_length)
#print (S.shape)
if show:
librosa.display.specshow(librosa.power_to_db(S,
ref=np.max),
y_axis='mel',cmap='gray_r',ax=ax, fmax=8000)
else:
return librosa.power_to_db(S,ref=np.max)
else:
if show:
specshow(librosa.amplitude_to_db(abs(X)),cmap=cm.Blues,#cm.gray_r,
sr=16000,ax=ax)
else:
return librosa.amplitude_to_db(abs(X))
def MSE_pcc(A,B,ax=None):
mse =np.mean(((A - B)**2/B.var()))
pcc = pearsonr(A.ravel(),B.ravel())[0]
return mse,pcc
def analyze(predict,GT_STFT_test_spkr,audio_pred,audio_gt,mode='test'):
samples = predict.shape[0]
pcc = np.zeros([samples])
mse = np.zeros([samples])
for i in range(samples):
mse[i], pcc[i] = MSE_pcc(predict[i],GT_STFT_test_spkr[i])
#mse[i], pcc[i] = MSE_pcc(predict[i] ,GT_STFT_test_spkr[i] )
stois = []
for i in range(samples):
stois.append(stoi(audio_pred[i*interval:(i+1)*interval], audio_gt[i*interval:(i+1)*interval], 16000, extended=False))
stois = np.array(stois)
fig,ax=plt.subplots(1,3,figsize=(18,4))
ax[0].hist(mse,bins=25,color='b')
ax[0].set_title(mode+' MSE: %g(%g)' %(np.round(mse.mean(),3),np.round(mse.std(),3)))
ax[1].hist(pcc,bins=50,color='g')
ax[1].set_title(mode+' PCC: %g(%g)' %(np.round(pcc.mean(),3),np.round(pcc.std(),3)))
ax[2].hist(stois,bins=50,color='r')
ax[2].set_title(mode+' STOI: %g(%g)' %(np.round(stois.mean(),3),np.round(stois.std(),3)))
return mse,pcc,stois
def play(audio,sr=16000):
'''
audio: tensor, eg: ex['audio']
'''
if len(audio.shape) >=2:
audio = audio.ravel()
display(ipd.Audio(audio,rate=sr))
interval = 16384
wave_gt = np.load('ny742_test_wave.npy').ravel()
wave_pred = librosa.load('sample_1.pngdenoisewave.wav',sr=16000)[0]
wave_merge = np.concatenate(([np.concatenate((wave_gt[i*interval:(i+1)*interval],wave_pred[i*interval:(i+1)*interval]))\
for i in range(50)]))
select_word = np.loadtxt('wordlist.txt',dtype='str')
display(ipd.Audio(wave_gt,rate=16000))
display(ipd.Audio(wave_pred,rate=16000))
1 hammer start from 0s
2 vase start from 1.024s
3 umbrella start from 2.048s
4 gun start from 3.072s
5 key start from 4.096s
6 cow start from 5.12s
7 couch start from 6.144s
8 glasses start from 7.168s
9 airplane start from 8.192s
10 leg start from 9.216s
11 bee start from 10.24s
12 cow start from 11.26s
13 table start from 12.29s
14 airplane start from 13.31s
15 fork start from 14.34s
16 hammer start from 15.36s
17 gun start from 16.38s
18 kite start from 17.41s
19 cat start from 18.43s
20 umbrella start from 19.46s
21 gloves start from 20.48s
22 knife start from 21.5s
23 house start from 22.53s
24 arrow start from 23.55s
25 sheep start from 24.58s
26 pencil start from 25.6s
27 watch start from 26.62s
28 kite start from 27.65s
29 violin start from 28.67s
30 box start from 29.7s
31 toe start from 30.72s
32 window start from 31.74s
33 mouse start from 32.77s
34 ball start from 33.79s
35 hat start from 34.82s
36 pencil start from 35.84s
37 gun start from 36.86s
38 cake start from 37.89s
39 knife start from 38.91s
40 crown start from 39.94s
41 window start from 40.96s
42 fork start from 41.98s
43 hat start from 43.01s
44 well start from 44.03s
45 envelope start from 45.06s
46 bed start from 46.08s
47 hammer start from 47.1s
48 gun start from 48.13s
49 ball start from 49.15s
50 flag start from 50.18s
display(ipd.Audio(wave_merge,rate=16000))
row_nums = 10
col_nums = 5
fig,ax=plt.subplots(row_nums*2,col_nums,figsize=(col_nums*2,row_nums*1.5))
for i in range(row_nums):
for j in range(col_nums):
try:
ax[i*2,j].set_title(select_word[i*col_nums+j])
except:
pass
ax[i*2,j].plot(wave_gt[(i*col_nums+j)*interval:(i*col_nums+j+1)*interval])
ax[i*2+1,j].plot(wave_pred[(i*col_nums+j)*interval:(i*col_nums+j+1)*interval])
ax[i*2,j].axis('off')
ax[i*2+1,j].axis('off')
fig.tight_layout()
spec_pred = np.zeros([50,256,128])
spec_gt = np.zeros([50,256,128])
for i in tqdm(range(50)):
spec_pred[i] = plot_stft(wave_pred[i*interval:(i+1)*interval],n_fft=511,hop_length=129,ax=None ,y_axis='linear',n_mels=64)
spec_gt[i] = plot_stft(wave_gt[i*interval:(i+1)*interval],n_fft=511,hop_length=129,ax=None,y_axis='linear',n_mels=64 )
spec_concat = np.concatenate(( np.flip(spec_gt,axis=1), np.flip(spec_pred,axis=1)),axis=1)
#np.concatenate((np.swapaxes(spec_gt,2,1), np.swapaxes(spec_pred,2,1)),\
# axis=1)
row_nums = 10
col_nums = 5
fig,ax=plt.subplots(row_nums,col_nums,figsize=(col_nums*2,row_nums*5))
cmap = cm.gray_r
for i in range(row_nums):
for j in range(col_nums):
ax[i,j].imshow(spec_concat[i*col_nums+j] ,cmap=cmap)
try:
ax[i,j].set_title(select_word[i*col_nums+j])
except:
pass
plt.tight_layout()
mse_test,pcc_test,stois_test = analyze(spec_pred,spec_gt,wave_pred,wave_gt)
interval = 16384
wave_a2a = librosa.load('org.wav',sr=16000)[0]
sample_num = wave_a2a.shape[0]//interval*interval
wave_a2a=wave_a2a[:sample_num *interval]
wave_pred = librosa.load('ecog.wav',sr=16000)[0][:sample_num *interval]
select_word = np.loadtxt('wordlist.txt',dtype='str')
ecog_select_words = np.array(['house','sheep','watch','hat','envelope'])
sample_ind = array([22, 24, 26, 34, 44])#np.where(np.isin(select_word,ecog_select_words))[0]
wave_gt_select = np.concatenate(([wave_gt[i*interval:(i+1)*interval] for i in sample_ind]))
wave_merge = np.concatenate(([np.concatenate((wave_gt_select[i*interval:(i+1)*interval],wave_pred[i*interval:(i+1)*interval]))\
for i in range(5)]))
display(ipd.Audio(wave_gt_select,rate=16000))
display(ipd.Audio(wave_a2a,rate=16000))
display(ipd.Audio(wave_pred,rate=16000))
1 hammer start from 0s
2 vase start from 1.024s
3 umbrella start from 2.048s
4 gun start from 3.072s
5 key start from 4.096s
display(ipd.Audio(wave_merge,rate=16000))
row_nums = 1
col_nums = 5
fig,ax=plt.subplots(row_nums*2,col_nums,figsize=(col_nums*2,row_nums*1.5))
for i in range(row_nums):
for j in range(col_nums):
try:
ax[i*2,j].set_title(ecog_select_words[i*col_nums+j])
except:
pass
ax[i*2,j].plot(wave_gt_select[(i*col_nums+j)*interval:(i*col_nums+j+1)*interval])
ax[i*2+1,j].plot(wave_pred[(i*col_nums+j)*interval:(i*col_nums+j+1)*interval])
ax[i*2,j].axis('off')
ax[i*2+1,j].axis('off')
fig.tight_layout()
spec_pred = np.zeros([5,256,128])
spec_gt = np.zeros([5,256,128])
for i in tqdm(range(5)):
spec_pred[i] = plot_stft(wave_pred[i*interval:(i+1)*interval],n_fft=511,hop_length=129,ax=None ,y_axis='linear',n_mels=64)
spec_gt[i] = plot_stft(wave_gt_select[i*interval:(i+1)*interval],n_fft=511,hop_length=129,ax=None,y_axis='linear',n_mels=64 )
spec_concat = np.concatenate(( np.flip(spec_gt,axis=1), np.flip(spec_pred,axis=1)),axis=1)
#np.concatenate((np.swapaxes(spec_gt,2,1), np.swapaxes(spec_pred,2,1)),\
# axis=1)
row_nums = 1
col_nums = 5
fig,ax=plt.subplots(row_nums,col_nums,figsize=(col_nums*2,row_nums*5))
cmap = cm.gray_r
for i in range(row_nums):
for j in range(col_nums):
try:
ax[i,j].imshow(spec_concat[i*col_nums+j] ,cmap=cmap)
except:
ax[j].imshow(spec_concat[i*col_nums+j] ,cmap=cmap)
try:
ax[i,j].set_title(ecog_select_words[i*col_nums+j])
except:
pass
try:
ax[j].set_title(ecog_select_words[i*col_nums+j])
except:
pass
plt.tight_layout()
mse_test,pcc_test,stois_test = analyze(spec_pred,spec_gt,wave_pred,wave_gt)
jupyter nbconvert --execute /Users/james/Desktop/videolab/ECoG/report/2020/formant_summary/1023/result.ipynb --to html_toc --ExtractOutputPreprocessor.enabled=False
rm -rf /Users/james/Desktop/website/demo_web/blog/public/formant_summary
rsync -avzh /Users/james/Desktop/videolab/ECoG/report/2020/formant_summary /Users/james/Desktop/website/demo_web/blog/public/
cd /Users/james/Desktop/website/demo_web/blog
hgd